# -*- coding:utf-8 -*- 
import requests
from bs4 import BeautifulSoup

def get_book_data(url, book_list, htmlfile):
    # 防止爬虫限制
    headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 Safari/537.36',
             }
    douban_data = requests.get(url, headers=headers)
    douban_html = douban_data.text
    soup = BeautifulSoup(douban_html, "html.parser")
    titles = soup.select('a.title') # 爬取书名
    descriptions = soup.select('div.desc') # 爬取描述
    scores = soup.select('span.rating_nums') # 爬取评分
    for title, description, score in zip(titles, descriptions, scores): # 循环输出
        print('书名' + title.get_text())    
        print('描述' + description.get_text().strip())
        print('评分' + score.get_text())
        htmlfile.write('<tr>')
        htmlfile.write('<td>%s</td>' % title.get_text())
        htmlfile.write('<td>%s</td>' % description.get_text().strip())
        htmlfile.write('<td>%s</td>' % score.get_text())
        htmlfile.write('</tr>')
        
             
        
        
if __name__ == '__main__':
    book_list = ['小说', '随笔', '散文', '童话', '诗歌', '名著'] # 爬取这六个标签下的图书内容
    tag = 0
    while (tag <= 5):
        page = 0
        htmlfile = open(book_list[tag] + '.html', 'w+', encoding="utf-8")
        htmlfile.write('<html>')
        htmlfile.write('<head><meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\"></head>')
        htmlfile.write('<body>')
        htmlfile.write('<table border="1px" cellspacing="0px" style="border-collapse:collapse">')
        htmlfile.write('<tr>')
        htmlfile.write('<th>书名</th>')
        htmlfile.write('<th>描述</th>')
        htmlfile.write('<th>评分</th>')
        htmlfile.write('</tr>')
        while (page < 10): # 爬取每个标签下的前十页图书内容
            url = "http://www.douban.com/tag/" + book_list[tag] + "/book?start=" + str(page * 15)
            get_book_data(url, book_list, htmlfile)
            page += 1 
        
        tag += 1    
        htmlfile.write('</table>')
        htmlfile.write('</body>')
        htmlfile.write('</html>')
        htmlfile.close()